From: Keir Fraser Date: Tue, 18 May 2010 14:05:54 +0000 (+0100) Subject: x86: Pull dynamic memory allocation out of do_boot_cpu(). X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~12128 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks://%22/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22?a=commitdiff_plain;h=c12bbde846af61d1b8441da6c627181ccbc7a157;p=xen.git x86: Pull dynamic memory allocation out of do_boot_cpu(). This has two advantages: (a) We can move the allocations to a context where we can handle failure. (b) We can implement matching deallocations on CPU offline. Only the idle vcpu structure is now not freed on CPU offline. This probably does not really matter. Signed-off-by: Keir Fraser --- diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 166a8be31f..b442a874ef 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -99,7 +99,7 @@ static void default_dead_idle(void) static void play_dead(void) { /* This must be done before dead CPU ack */ - cpu_exit_clear(); + cpu_exit_clear(smp_processor_id()); wbinvd(); mb(); /* Ack it */ diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 700188e406..bebc9c58d8 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -5331,6 +5331,13 @@ void memguard_guard_stack(void *p) memguard_guard_range(p, PAGE_SIZE); } +void memguard_unguard_stack(void *p) +{ + p = (void *)((unsigned long)p + STACK_SIZE - + PRIMARY_STACK_SIZE - PAGE_SIZE); + memguard_unguard_range(p, PAGE_SIZE); +} + /* * Local variables: * mode: C diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index ddad0a423c..8db5d0a152 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -336,7 +336,7 @@ void start_secondary(void *unused) /* This must be done before setting cpu_online_map */ spin_debug_enable(); - set_cpu_sibling_map(raw_smp_processor_id()); + set_cpu_sibling_map(smp_processor_id()); wmb(); /* @@ -545,24 +545,11 @@ int alloc_cpu_id(void) return (cpu < NR_CPUS) ? cpu : -ENODEV; } -static void *prepare_idle_stack(unsigned int cpu) -{ - if ( !stack_base[cpu] ) - stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, 0); - return stack_base[cpu]; -} - static int do_boot_cpu(int apicid, int cpu) { unsigned long boot_error; - unsigned int order; int timeout; unsigned long start_eip; - struct vcpu *v; - struct desc_struct *gdt; -#ifdef __x86_64__ - struct page_info *page; -#endif /* * Save current MTRR state in case it was changed since early boot @@ -572,62 +559,15 @@ static int do_boot_cpu(int apicid, int cpu) booting_cpu = cpu; - v = alloc_idle_vcpu(cpu); - BUG_ON(v == NULL); - /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); /* So we see what's up */ - if (opt_cpu_info) + if ( opt_cpu_info ) printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); - stack_start.esp = prepare_idle_stack(cpu); - - /* Debug build: detect stack overflow by setting up a guard page. */ - memguard_guard_stack(stack_start.esp); - - gdt = per_cpu(gdt_table, cpu); - if ( gdt == boot_cpu_gdt_table ) - { - order = get_order_from_pages(NR_RESERVED_GDT_PAGES); -#ifdef __x86_64__ - page = alloc_domheap_pages(NULL, order, - MEMF_node(cpu_to_node(cpu))); - per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page); - memcpy(gdt, boot_cpu_compat_gdt_table, - NR_RESERVED_GDT_PAGES * PAGE_SIZE); - gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu; - page = alloc_domheap_pages(NULL, order, - MEMF_node(cpu_to_node(cpu))); - per_cpu(gdt_table, cpu) = gdt = page_to_virt(page); -#else - per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, 0); -#endif - memcpy(gdt, boot_cpu_gdt_table, - NR_RESERVED_GDT_PAGES * PAGE_SIZE); - BUILD_BUG_ON(NR_CPUS > 0x10000); - gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu; - } - -#ifdef __i386__ - if ( !per_cpu(doublefault_tss, cpu) ) - { - per_cpu(doublefault_tss, cpu) = alloc_xenheap_page(); - memset(per_cpu(doublefault_tss, cpu), 0, PAGE_SIZE); - } -#else - if ( !per_cpu(compat_arg_xlat, cpu) ) - setup_compat_arg_xlat(cpu, cpu_to_node[cpu]); -#endif - - if ( !idt_tables[cpu] ) - { - idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES); - memcpy(idt_tables[cpu], idt_table, - IDT_ENTRIES*sizeof(idt_entry_t)); - } + stack_start.esp = stack_base[cpu]; /* This grunge runs the startup process for the targeted processor. */ @@ -677,16 +617,7 @@ static int do_boot_cpu(int apicid, int cpu) } if ( boot_error ) - { - /* Try to put things back the way they were before ... */ - unmap_cpu_to_logical_apicid(cpu); - cpu_clear(cpu, cpu_callout_map); /* was set here */ - cpu_uninit(cpu); /* undoes cpu_init() */ - - /* Mark the CPU as non-present */ - x86_cpu_to_apicid[cpu] = BAD_APICID; - cpu_clear(cpu, cpu_present_map); - } + cpu_exit_clear(cpu); /* mark "stuck" area as not stuck */ bootsym(trampoline_cpu_started) = 0; @@ -697,10 +628,8 @@ static int do_boot_cpu(int apicid, int cpu) return boot_error ? -EIO : 0; } -void cpu_exit_clear(void) +void cpu_exit_clear(unsigned int cpu) { - int cpu = raw_smp_processor_id(); - cpu_uninit(cpu); cpu_clear(cpu, cpu_callout_map); @@ -710,8 +639,127 @@ void cpu_exit_clear(void) unmap_cpu_to_logical_apicid(cpu); } +static void cpu_smpboot_free(unsigned int cpu) +{ + unsigned int order; + + xfree(idt_tables[cpu]); + idt_tables[cpu] = NULL; + +#ifdef __x86_64__ + free_compat_arg_xlat(cpu); +#endif + + order = get_order_from_pages(NR_RESERVED_GDT_PAGES); +#ifdef __x86_64__ + if ( per_cpu(compat_gdt_table, cpu) ) + free_domheap_pages(virt_to_page(per_cpu(gdt_table, cpu)), order); + if ( per_cpu(gdt_table, cpu) ) + free_domheap_pages(virt_to_page(per_cpu(compat_gdt_table, cpu)), + order); + per_cpu(compat_gdt_table, cpu) = NULL; +#else + free_xenheap_pages(per_cpu(gdt_table, cpu), order); +#endif + per_cpu(gdt_table, cpu) = NULL; + + if ( stack_base[cpu] != NULL ) + { + memguard_guard_stack(stack_base[cpu]); + free_xenheap_pages(stack_base[cpu], STACK_ORDER); + stack_base[cpu] = NULL; + } +} + +static int cpu_smpboot_alloc(unsigned int cpu) +{ + unsigned int order; + struct desc_struct *gdt; +#ifdef __x86_64__ + struct page_info *page; +#endif + + if ( alloc_idle_vcpu(cpu) == NULL ) + goto oom; + + stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, 0); + if ( stack_base[cpu] == NULL ) + goto oom; + memguard_guard_stack(stack_base[cpu]); + + order = get_order_from_pages(NR_RESERVED_GDT_PAGES); +#ifdef __x86_64__ + page = alloc_domheap_pages(NULL, order, + MEMF_node(cpu_to_node(cpu))); + if ( !page ) + goto oom; + per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page); + memcpy(gdt, boot_cpu_compat_gdt_table, + NR_RESERVED_GDT_PAGES * PAGE_SIZE); + gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu; + page = alloc_domheap_pages(NULL, order, + MEMF_node(cpu_to_node(cpu))); + if ( !page ) + goto oom; + per_cpu(gdt_table, cpu) = gdt = page_to_virt(page); +#else + per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, 0); + if ( !gdt ) + goto oom; +#endif + memcpy(gdt, boot_cpu_gdt_table, + NR_RESERVED_GDT_PAGES * PAGE_SIZE); + BUILD_BUG_ON(NR_CPUS > 0x10000); + gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu; + +#ifdef __x86_64__ + if ( setup_compat_arg_xlat(cpu, cpu_to_node[cpu]) ) + goto oom; +#endif + + idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES); + if ( idt_tables[cpu] == NULL ) + goto oom; + memcpy(idt_tables[cpu], idt_table, + IDT_ENTRIES*sizeof(idt_entry_t)); + + return 0; + + oom: + cpu_smpboot_free(cpu); + return -ENOMEM; +} + +static int cpu_smpboot_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + int rc = 0; + + switch ( action ) + { + case CPU_UP_PREPARE: + rc = cpu_smpboot_alloc(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + cpu_smpboot_free(cpu); + break; + default: + break; + } + + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); +} + +static struct notifier_block cpu_smpboot_nfb = { + .notifier_call = cpu_smpboot_callback +}; + void __init smp_prepare_cpus(unsigned int max_cpus) { + register_cpu_notifier(&cpu_smpboot_nfb); + mtrr_aps_sync_begin(); /* Setup boot CPU information */ diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c index 74f479e971..eb6154c34f 100644 --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -191,10 +192,41 @@ void show_page_walk(unsigned long addr) unmap_domain_page(l1t); } -DEFINE_PER_CPU_READ_MOSTLY(struct tss_struct *, doublefault_tss); +static DEFINE_PER_CPU_READ_MOSTLY(struct tss_struct *, doublefault_tss); static unsigned char __attribute__ ((__section__ (".bss.page_aligned"))) boot_cpu_doublefault_space[PAGE_SIZE]; +static int cpu_doublefault_tss_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + void *p; + int rc = 0; + + switch ( action ) + { + case CPU_UP_PREPARE: + per_cpu(doublefault_tss, cpu) = p = alloc_xenheap_page(); + if ( p == NULL ) + rc = -ENOMEM; + else + memset(p, 0, PAGE_SIZE); + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + free_xenheap_page(per_cpu(doublefault_tss, cpu)); + break; + default: + break; + } + + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); +} + +static struct notifier_block cpu_doublefault_tss_nfb = { + .notifier_call = cpu_doublefault_tss_callback +}; + asmlinkage void do_double_fault(void) { struct tss_struct *tss; @@ -300,18 +332,23 @@ static void set_task_gate(unsigned int n, unsigned int sel) void __devinit subarch_percpu_traps_init(void) { - struct tss_struct *tss = this_cpu(doublefault_tss); + struct tss_struct *tss; asmlinkage int hypercall(void); + int cpu = smp_processor_id(); - if ( !tss ) + if ( cpu == 0 ) { /* The hypercall entry vector is only accessible from ring 1. */ _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall); - tss = (void *)boot_cpu_doublefault_space; - this_cpu(doublefault_tss) = tss; + this_cpu(doublefault_tss) = (void *)boot_cpu_doublefault_space; + + register_cpu_notifier(&cpu_doublefault_tss_nfb); } + tss = this_cpu(doublefault_tss); + BUG_ON(tss == NULL); + /* * Make a separate task for double faults. This will get us debug output if * we blow the kernel stack. diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index 71d0a11ea5..357a80a5d7 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -810,20 +810,22 @@ void __init zap_low_mappings(void) int __cpuinit setup_compat_arg_xlat(unsigned int cpu, int node) { unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE); - unsigned long sz = PAGE_SIZE << order; unsigned int memflags = node != NUMA_NO_NODE ? MEMF_node(node) : 0; struct page_info *pg; - pg = alloc_domheap_pages(NULL, order, memflags); - if ( !pg ) - return -ENOMEM; - - for ( ; (sz -= PAGE_SIZE) >= COMPAT_ARG_XLAT_SIZE; ++pg ) - free_domheap_page(pg); + BUG_ON((PAGE_SIZE << order) != COMPAT_ARG_XLAT_SIZE); - per_cpu(compat_arg_xlat, cpu) = page_to_virt(pg); + pg = alloc_domheap_pages(NULL, order, memflags); + per_cpu(compat_arg_xlat, cpu) = pg ? page_to_virt(pg) : NULL; + return pg ? 0 : -ENOMEM; +} - return 0; +void __cpuinit free_compat_arg_xlat(unsigned int cpu) +{ + unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE); + if ( per_cpu(compat_arg_xlat, cpu) != NULL ) + free_domheap_pages(virt_to_page(per_cpu(compat_arg_xlat, cpu)), order); + per_cpu(compat_arg_xlat, cpu) = NULL; } void cleanup_frame_table(struct mem_hotadd_info *info) diff --git a/xen/common/cpu.c b/xen/common/cpu.c index 7f6891b786..b7bf377e00 100644 --- a/xen/common/cpu.c +++ b/xen/common/cpu.c @@ -161,7 +161,7 @@ int disable_nonboot_cpus(void) { int cpu, error = 0; - BUG_ON(raw_smp_processor_id() != 0); + BUG_ON(smp_processor_id() != 0); cpus_clear(frozen_cpus); diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h index 20eb8dbe5f..a072e33ede 100644 --- a/xen/include/asm-x86/desc.h +++ b/xen/include/asm-x86/desc.h @@ -197,8 +197,6 @@ do { \ (((u32)(addr) & 0x00FF0000U) >> 16); \ } while (0) -DECLARE_PER_CPU(struct tss_struct *, doublefault_tss); - #endif struct desc_ptr { diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 1cff19bb4e..26a1a680f2 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -485,6 +485,7 @@ void memguard_unguard_range(void *p, unsigned long l); #endif void memguard_guard_stack(void *p); +void memguard_unguard_stack(void *p); int ptwr_do_page_fault(struct vcpu *, unsigned long, struct cpu_user_regs *); diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h index 8121f82f36..60f516c640 100644 --- a/xen/include/asm-x86/smp.h +++ b/xen/include/asm-x86/smp.h @@ -56,7 +56,7 @@ extern u32 cpu_2_logical_apicid[]; DECLARE_PER_CPU(int, cpu_state); #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -extern void cpu_exit_clear(void); +extern void cpu_exit_clear(unsigned int cpu); extern void cpu_uninit(unsigned int cpu); int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm); diff --git a/xen/include/asm-x86/x86_64/uaccess.h b/xen/include/asm-x86/x86_64/uaccess.h index 40f5e4210d..c5cb386d2b 100644 --- a/xen/include/asm-x86/x86_64/uaccess.h +++ b/xen/include/asm-x86/x86_64/uaccess.h @@ -5,6 +5,7 @@ #define COMPAT_ARG_XLAT_SIZE (2*PAGE_SIZE) DECLARE_PER_CPU(void *, compat_arg_xlat); int setup_compat_arg_xlat(unsigned int cpu, int node); +void free_compat_arg_xlat(unsigned int cpu); #define is_compat_arg_xlat_range(addr, size) ({ \ unsigned long __off; \ __off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE; \